In this script we are going to map myeloid cell subtypes onto the Visium slides.
library(Seurat)
library(ggpubr)
library(cowplot)
library(dplyr)
library(ggplot2)
library(stringr)
library(readr)
library(SPOTlight)
Loading necessary paths and parameters
set.seed(123)
source(here::here("misc/paths.R"))
source(here::here("utils/bin.R"))
"{myeloid}/{plt_dir}" %>%
glue::glue() %>%
here::here() %>%
dir.create(path = ,
showWarnings = FALSE,
recursive = TRUE)
"{myeloid}/{robj_dir}" %>%
glue::glue() %>%
here::here() %>%
dir.create(path = ,
showWarnings = FALSE,
recursive = TRUE)
myel_vec <- c(
"IFN1+ PDC", "PDC", "aDC1", "aDC2", "aDC3", "C1Q HLA macrophages", "Cycling",
"DC1 mature", "DC1 precursor", "DC2", "DC3", "DC4", "DC5", "IL7R DC",
"IL7R MMP12 macrophages", "ITGAX ZEB2 macrophages", "M1 Macrophages",
"Mast cells", "Monocytes", "Neutrophil Granulocytes",
"SELENOP FUCA1 PTGDS macrophages")
Extract sample id and get Donor ID
# sample_id <- params$sample_id
sample_id <- "esvq52_nluss5"
donor_id <- id_sp_df[id_sp_df$gem_id == sample_id, ] %>% dplyr::pull(donor_id)
The spatial data comes from the script 03-clustering/03-clustering_integration.Rmd while the sc data can be found in Ramon’s scRNAseq analysis: /scratch/devel/rmassoni/tonsil_atlas_private/2-DOWNSTREAM_PROCESSING/results/R_objects/processed_seurat_objects/processed_seurat_objects/tonsil_integrated_with_harmony_scrublet_annotated.rds.
sp_obj <- "misc/{robj_dir}/20220215_tonsil_atlas_spatial_seurat_obj.rds" %>%
glue::glue() %>%
here::here() %>%
readRDS(file = .)
# Single cell data
sc_obj <- "misc/{robj_dir}/20220215_tonsil_atlas_rna_seurat_obj.rds" %>%
glue::glue() %>%
here::here() %>%
readRDS(file = .)
We are going to use SPOTlight to deconvolute the spatial spots and map the cell types.
We will start by adding the level5 definitive metadata labels to the full object.
sc_obj@meta.data <- sc_obj@meta.data %>%
dplyr::mutate(
# Annotation for SPOTlight
annotation_deconv = dplyr::if_else(
annotation_20220215 %in% myel_vec,
as.character(annotation_20220215), as.character(annotation_level_1))
)
# Remove preBC, preTC since we are not interested in them and they are minor pops
# Remove myeloid since all the "myeloid" cells are already in lvl5
sc_obj <- sc_obj[, ! sc_obj$annotation_deconv %in% c("preBC", "preTC", "myeloid")]
table(sc_obj@meta.data$annotation_deconv)
##
## aDC1 aDC2 aDC3 C1Q HLA macrophages CD4_T Cycling Cytotoxic DC1 mature DC1 precursor DC2 DC3 DC4 DC5 epithelial FDC GCBC IFN1+ PDC IL7R DC IL7R MMP12 macrophages ITGAX ZEB2 macrophages M1 Macrophages Mast cells Monocytes NBC_MBC Neutrophil Granulocytes PC PDC SELENOP FUCA1 PTGDS macrophages
## 81 16 83 301 58783 70 6009 86 104 181 20 28 95 361 1203 72174 50 27 166 171 72 37 93 112478 155 9088 432 446
# level 5 Myeloid T Cells
exist <- myel_vec %in% unique(sc_obj$annotation_deconv)
names(exist) <- myel_vec
exist
## IFN1+ PDC PDC aDC1 aDC2 aDC3 C1Q HLA macrophages Cycling DC1 mature DC1 precursor DC2 DC3 DC4 DC5 IL7R DC IL7R MMP12 macrophages ITGAX ZEB2 macrophages M1 Macrophages Mast cells Monocytes Neutrophil Granulocytes SELENOP FUCA1 PTGDS macrophages
## TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
table(exist)
## exist
## TRUE
## 21
lvl5_mye_obj <- sc_obj[, sc_obj$annotation_deconv %in% myel_vec]
Since the normalization was carried out with the entire dataset we are going to do normalization again within this subset
sc_obj <- Seurat::NormalizeData(
sc_obj,
normalization.method = "LogNormalize",
scale.factor = 10000,
verbose = FALSE) %>%
Seurat::FindVariableFeatures(., nfeatures = 3000, verbose = FALSE) %>%
Seurat::ScaleData(., verbose = FALSE)
1st we need to determine the marker genes for each high-level cluster
markers_lvl1_out <- "{myeloid}/{robj_dir}/markers_lvl1_myeloid.RDS" %>%
glue::glue() %>%
here::here()
# file.remove(markers_lvl1_out)
if (file.exists(markers_lvl1_out)) {
markers_lvl1 <- readRDS(file = markers_lvl1_out)
} else {
#### Extract marker genes from each cluster ####
Seurat::Idents(object = sc_obj) <- sc_obj@meta.data[, "annotation_level_1"]
markers_lvl1 <- Seurat::FindAllMarkers(
object = sc_obj,
assay = "RNA",
slot = "data",
verbose = TRUE,
only.pos = TRUE,
max.cells.per.ident = 200)
saveRDS(object = markers_lvl1, file = markers_lvl1_out)
}
DT::datatable(markers_lvl1, filter = "top")
Now find the markers for Myeloid cells level 5 only between them
markers_lvl5_out <- "{myeloid}/{robj_dir}/markers_lvl5_myeloid.RDS" %>%
glue::glue() %>%
here::here()
# file.remove(markers_lvl5_out)
if (file.exists(markers_lvl5_out)) {
markers_lvl5 <- readRDS(file = markers_lvl5_out)
} else {
#### Extract marker genes from each cluster ####
Seurat::Idents(object = lvl5_mye_obj) <- lvl5_mye_obj@meta.data[, "annotation_deconv"]
markers_lvl5 <- Seurat::FindAllMarkers(
object = lvl5_mye_obj,
assay = "RNA",
slot = "data",
verbose = TRUE,
only.pos = TRUE,
max.cells.per.ident = 200)
saveRDS(object = markers_lvl5, file = markers_lvl5_out)
}
DT::datatable(markers_lvl5, filter = "top")
Join markers from both runs. Since we still want to keep the CD4 T cells markers in the lower level markers we will add them to each one.
# Keep markers for major cell types
markers_lvl1_sub <- markers_lvl1 %>%
dplyr::filter( ( avg_log2FC > 1 | pct.1 - pct.2 > 0.75) &
p_val_adj < 0.01 & pct.1 > 0.3 ) %>%
dplyr::filter(cluster != "myeloid")
Add general myeloid cell markers to the specific cell types
markers_lvl5_update <- lapply(as.character(unique(markers_lvl5$cluster)),
function(i) {
tmp <- markers_lvl5 %>%
dplyr::filter(cluster == i) %>%
dplyr::mutate(cluster = i) %>%
dplyr::filter(avg_log2FC > 0.75 | pct.1 - pct.2 > 0.3)
# Only add myeloid to myeloid
if (str_detect(string = i, pattern = "PDC", negate = TRUE)) {
tmp <- tmp %>%
dplyr::bind_rows(markers_lvl1 %>%
filter(cluster == "myeloid" & avg_log2FC > 1.5))
}
tmp
}) %>% bind_rows()
Combine level-1 and level-5 markers
all_markers <- dplyr::bind_rows(markers_lvl1_sub, markers_lvl5_update)
Subset SC object so all the cells from the same cell type to come from the same batch
# subset most representative sample(s)
ns <- with(sc_obj@meta.data, table(gem_id, annotation_deconv))
ns <- as.matrix(unclass(ns))
m <- 100 # Max cells per cell type
# Extract cell barcodes
meta <- sc_obj@meta.data
id <- lapply(colnames(ns), function(nm) {
x <- ns[, nm]
# Initialize variables
n <- 0 # N of cells
s <- c() # Gem IDs
b <- c() # Cell barcodes
while (n < m & length(x) > 0) {
# select gem id with the most cells
i <- which.max(x)
# Add gem id to vector s
s <- c(s, names(i))
# Add number of cells per cell type to n
n <- n + x[i]
# Remove gem id from x to move on to the next
x <- x[-i]
# extract barcode
barcode <- rownames(meta[meta[, "gem_id"] == names(i) &
meta[, "annotation_deconv"] == nm, ])
# make sure it adds up to m
# print(barcode)
if (length(b) + length(barcode) > m) {
barcode <- sample(x = barcode, size = m - length(b), replace = FALSE)
}
b <- c(b, barcode) # Cell barcodes
# print(b)
}
return(b)
})
sc_sub <- sc_obj[, unlist(id)]
table(sc_sub$annotation_deconv)
##
## aDC1 aDC2 aDC3 C1Q HLA macrophages CD4_T Cycling Cytotoxic DC1 mature DC1 precursor DC2 DC3 DC4 DC5 epithelial FDC GCBC IFN1+ PDC IL7R DC IL7R MMP12 macrophages ITGAX ZEB2 macrophages M1 Macrophages Mast cells Monocytes NBC_MBC Neutrophil Granulocytes PC PDC SELENOP FUCA1 PTGDS macrophages
## 81 16 83 100 100 70 100 86 100 100 20 28 95 100 100 100 50 27 100 100 72 37 93 100 100 100 100 100
2nd run deconvolution
decon_fn <- "{myeloid}/{robj_dir}/spotlight_ls_myeloid.rds" %>%
glue::glue() %>%
here::here()
# if (! file.exists(decon_fn) ) {
saveRDS(
object = spotlight_ls,
file = decon_fn)
# } else {
# spotlight_ls <- readRDS(file = here::here(glue::glue("{myeloid}/{robj_dir}/spotlight_ls_myeloid.rds")))
# }
3rd Check Topic profiles
nmf_mod_ls <- spotlight_ls[[1]]
nmf_mod <- nmf_mod_ls[[1]]
h <- NMF::coef(nmf_mod)
rownames(h) <- paste("Topic", 1:nrow(h), sep = "_")
topic_profile_plts <- SPOTlight::dot_plot_profiles_fun(
h = h,
train_cell_clust = nmf_mod_ls[[2]])
topic_profile_plts[[2]] +
ggplot2::theme(axis.text.x = ggplot2::element_text(angle = 90),
axis.text = ggplot2::element_text(size = 12))
decon_mtrx <- spotlight_ls[[2]]
decon_mtrx <- decon_mtrx[, colnames(decon_mtrx) != "res_ss"]
# Set as 0 cell types predicted to be under 1 % of the spot
# decon_mtrx[decon_mtrx < 0.03] <- 0
Change column names
nm_df <- data.frame(
mod_nm = stringr::str_replace_all(string = unique(all_markers$cluster),
pattern = "[[:punct:]]|\\s|\\+" ,
replacement = "."),
plt_nm = unique(all_markers$cluster)
)
# Save nm_df
"{myeloid}/{robj_dir}/myeloid_nm_df.rds" %>%
glue::glue() %>%
here::here() %>%
saveRDS(
object = nm_df,
file = .)
new_cn <- data.frame(mod_nm = colnames(decon_mtrx)) %>%
dplyr::left_join(nm_df, by = "mod_nm") %>%
# Central.Mem.PASK. fives some trouble because it only changes between + an -
# negative goes first and distinct solves it automatically
dplyr::distinct() %>%
dplyr::pull(plt_nm)
colnames(decon_mtrx) <- new_cn
We are going to add the deconvolution to the Seurat object.
metadata <- cbind(sp_obj@meta.data, decon_mtrx)
sp_obj@meta.data <- metadata
Look at cells topic profile
basis_spotlight <- data.frame(NMF::basis(spotlight_ls[[1]][[1]]))
train_labs <- spotlight_ls[[1]][[2]]
colnames(basis_spotlight) <- unique(stringr::str_wrap(train_labs, width = 30))
basis_spotlight[basis_spotlight < 0.0000001] <- 0
DT::datatable(basis_spotlight, filter = "top")
sessionInfo()
## R version 4.0.1 (2020-06-06)
## Platform: x86_64-conda_cos6-linux-gnu (64-bit)
## Running under: Red Hat Enterprise Linux Server release 6.7 (Santiago)
##
## Matrix products: default
## BLAS/LAPACK: /scratch/groups/singlecell/software/anaconda3/envs/spatial_r/lib/libopenblasp-r0.3.12.so
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C LC_TIME=es_ES.UTF-8 LC_COLLATE=en_US.UTF-8 LC_MONETARY=es_ES.UTF-8 LC_MESSAGES=en_US.UTF-8 LC_PAPER=es_ES.UTF-8 LC_NAME=C LC_ADDRESS=C LC_TELEPHONE=C LC_MEASUREMENT=es_ES.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] parallel stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] tidyr_1.2.0 nnls_1.4 edgeR_3.32.1 limma_3.46.0 Matrix_1.4-0 NMF_0.23.0 Biobase_2.50.0 BiocGenerics_0.36.1 cluster_2.1.2 rngtools_1.5.2 pkgmaker_0.32.2 registry_0.5-1 tibble_3.1.6 purrr_0.3.4 SPOTlight_0.1.7 readr_2.1.2 stringr_1.4.0 dplyr_1.0.8 cowplot_1.1.1 ggpubr_0.4.0 ggplot2_3.3.5 SeuratObject_4.0.4 Seurat_4.1.0
##
## loaded via a namespace (and not attached):
## [1] backports_1.4.1 plyr_1.8.6 igraph_1.2.11 lazyeval_0.2.2 splines_4.0.1 crosstalk_1.2.0 listenv_0.8.0 scattermore_0.8 gridBase_0.4-7 digest_0.6.29 foreach_1.5.2 htmltools_0.5.2 fansi_1.0.2 magrittr_2.0.2 doParallel_1.0.17 tensor_1.5 ROCR_1.0-11 tzdb_0.2.0 globals_0.14.0 matrixStats_0.61.0 vroom_1.5.7 spatstat.sparse_2.1-0 colorspace_2.0-3 ggrepel_0.9.1 xfun_0.30 crayon_1.5.0 jsonlite_1.8.0 spatstat.data_2.1-2 iterators_1.0.14 survival_3.3-1 zoo_1.8-9 glue_1.6.2 polyclip_1.10-0 gtable_0.3.0 leiden_0.3.9 car_3.0-12 future.apply_1.8.1 abind_1.4-5 scales_1.1.1 DBI_1.1.2 rstatix_0.7.0 spatstat.random_2.1-0 miniUI_0.1.1.1 Rcpp_1.0.8 viridisLite_0.4.0 xtable_1.8-4 reticulate_1.24 spatstat.core_2.4-0 bit_4.0.4 DT_0.21 htmlwidgets_1.5.4 httr_1.4.2 RColorBrewer_1.1-2 ellipsis_0.3.2
## [55] ica_1.0-2 farver_2.1.0 pkgconfig_2.0.3 sass_0.4.0 uwot_0.1.11 deldir_1.0-6 locfit_1.5-9.4 utf8_1.2.2 here_1.0.1 labeling_0.4.2 tidyselect_1.1.2 rlang_1.0.2 reshape2_1.4.4 later_1.3.0 munsell_0.5.0 tools_4.0.1 cli_3.2.0 generics_0.1.2 broom_0.7.12 ggridges_0.5.3 evaluate_0.15 fastmap_1.1.0 yaml_2.3.5 goftest_1.2-3 knitr_1.37 bit64_4.0.5 fitdistrplus_1.1-6 RANN_2.6.1 pbapply_1.5-0 future_1.24.0 nlme_3.1-155 mime_0.12 compiler_4.0.1 plotly_4.10.0 png_0.1-7 ggsignif_0.6.3 spatstat.utils_2.3-0 bslib_0.3.1 stringi_1.7.6 highr_0.9 lattice_0.20-45 vctrs_0.3.8 pillar_1.7.0 lifecycle_1.0.1 spatstat.geom_2.3-2 lmtest_0.9-39 jquerylib_0.1.4 RcppAnnoy_0.0.19 data.table_1.14.2 irlba_2.3.5 httpuv_1.6.5 patchwork_1.1.1 R6_2.5.1 promises_1.2.0.1
## [109] KernSmooth_2.23-20 gridExtra_2.3 parallelly_1.30.0 codetools_0.2-18 MASS_7.3-55 assertthat_0.2.1 rprojroot_2.0.2 withr_2.5.0 sctransform_0.3.3 mgcv_1.8-39 hms_1.1.1 grid_4.0.1 rpart_4.1.16 rmarkdown_2.12 carData_3.0-5 Rtsne_0.15 shiny_1.7.1